## Creating Heterogeneous Groups for Team-Based Learning
## Ryan T. Moore
## 1 August 2014

## This file is to replicate 
## Moore, Ryan T. ``Overcoming Barriers to Heterogeneous-Group 
## Learning in the Political Science Classroom''. PS: Political 
## Science & Politics, 47(X):XX–XX, Forthcoming, 2014.

setwd("..")

## Load libraries:
library(blockTools)
library(plyr)
library(ggplot2)
library(grid)
library(gridExtra)
library(reshape)

######### Section 4 ######### 

## Load Class 1 Semester Data:
load("data/class1.RData")
## Create groups:
b <- block(class1, id.vars = "maskname", block.vars = "semester", n.tr = 10)

## Assess Quality of Teams as Created:
class1$team <- NA
for(i in 1:nrow(class1)){
  class1$team[i] <- ceiling(which(b$blocks[[1]] == class1$maskname[i])/nrow(b$blocks[[1]]))
}
rm(i)
## Every team has 5 \leq mean(semester) \leq 6:
ddply(class1, .(team), summarise, meanSem = mean(semester))

## What if We Had Just Randomly Allocated to Teams?
## Compare to 1000 random allocations:
n.rands <- 1000
store.means <- data.frame(i = NA, grp.tmp = NA, mean = NA)
storeF <- storeKW <- array(NA)
set.seed(22)
for(i in 1:n.rands){
  class1$grp.tmp <- sample(c(rep(1:10, 3), 1:8))
  store.means <- rbind(store.means, data.frame(cbind(i, ddply(class1, .(grp.tmp), summarise, mean = mean(semester)))))
  storeF[i] <- oneway.test(semester ~ grp.tmp, data = class1)$statistic
  storeKW[i] <- kruskal.test(semester ~ grp.tmp, data = class1)$statistic
}

## Summarise:
class.summ <- ddply(class1, .(team), summarise, mean = mean(semester))
store.means <- rbind(data.frame(i = 0, grp.tmp = class.summ$team, mean = class.summ$mean), store.means)
store.means <- store.means[!is.na(store.means$i), ]
store.means.all <- ddply(store.means, .(i), transform, range = max(mean)-min(mean))
## How many of 1000 have range one or less:
sum(store.means.all$range <= 1)/10 - 1 ## -1 for class.
## Trim to first 100 for Figure:
store.means.small <- store.means[1:1010, ]
store.means.small <- ddply(store.means.small, .(i), transform, range = max(mean)-min(mean))
## Next smallest/Largest range in Figure:
sort(unique(store.means.small$range))

## Overall mean semester of class:
mean(class1$semester)

## Figure 1 Color:
p <- ggplot(store.means.small, aes(x = as.factor(reorder(i, range)), y = mean)) + geom_violin(aes(fill = as.factor(i)==0)) + labs(y="Mean Semester within Groups", x = "Random Allocation") +  theme(legend.position="none") + theme(axis.ticks = element_blank(), axis.text.x = element_blank())
p + geom_hline(yintercept = mean(class1$semester), col = "blue", lty = 2) + geom_segment(aes(x = 8, y = 4.2, xend = 1.2, yend = 5.5), arrow = arrow(length = unit(0.5, "cm"))) + annotate("text", label = "Actual 
                                                                                                                                                                                  Randomization", x = 8, y = 4)

## Figure 1 Black and White:
p <- ggplot(store.means.small, aes(x = as.factor(reorder(i, range)), y = mean)) + geom_violin(aes(fill = as.factor(i)==0)) + scale_fill_manual(values = c("#CCCCCC", "#666666")) + labs(y="Mean Semester within Groups", x = "Random Allocation") +  theme(legend.position="none") + theme(axis.ticks = element_blank(), axis.text.x = element_blank())

## pdf("figs/meansViolinBW.pdf", width = 14)
p + geom_hline(yintercept = mean(class1$semester), col = "black", lty = 2) + geom_segment(aes(x = 8, y = 4.2, xend = 1.2, yend = 5.5), arrow = arrow(length = unit(0.5, "cm"))) + annotate("text", label = "Actual 
                                                                                                                                                                                  Randomization", x = 8, y = 4)
## dev.off()

## $F$-test of all means equal:
oneway.test(semester ~ team, data = class1)
## Kruskal-Wallis rank sum test:
kruskal.test(semester ~ team, data = class1)


######### Section 5 ######### 

## Load function:
source("code/drawGroups.R")

## 1-covariate Example:
set.seed(452)
d.out <- draw_groups(n = 24, num.groups = 6, bl.vars = "isMale", n.rands = 1000)
## Summary:
summary(d.out$blocked) ## all perfectly split
summary(d.out$crand) ## usually between 1/3 and 2/3 of groups are evenly split
mean(d.out$crand==1) ## only 1.7% are perfect -- versus 100% for bT
mean(d.out$allmw > 0) ## 38.8% have at least one group of all men or all women

## 2-covariate Example:
set.seed(253)
d.out <- draw_groups(n = 24, num.groups = 6, bl.vars = c("isMale", "year"), n.rands = 1000)

## Figure 2:
tmp <- data.frame(bl = c(sort(d.out$pvKWbl[, 1]), sort(d.out$pvKWbl[, 2])), cr = c(sort(d.out$pvKWcr[, 1]), sort(d.out$pvKWcr[, 2])))
tmp$varnum <- rep(c("Gender", "Year"), each = nrow(tmp)/2)
## pdf("figs/qq2vars2BW.pdf", width = 14) 
ggplot(tmp, aes(x=cr, y = bl)) + geom_line(color = "black", size = 2) + xlim(0,1) + ylim(0,1) + geom_abline(xintercept = 0, slope = 1, linetype = 2) + labs(x= "Random Allocation's KW p-value", y = "Our Procedure's KW p-value") + facet_wrap(~ varnum) 
## dev.off()

## Summary:
100*mean(d.out$blocked == 1) ## percent of the 
d.out$call$n.rands ## classes have all 
d.out$call$num.groups ## evenly divided between men and women.  By contrast, only 
100*mean(d.out$crand == 1) ## percent of the 
d.out$call$n.rands  ## classes have all groups equally split between men and women.

## Methodology class Example:
load("data/class2.RData")
source("code/evaluateLabs.R")
bv <- c("math", "describes", "anxious", "stats", "gender")
table(class2$Lab)
set.seed(686)
lab.c <- evaluateLabs(class2[class2$Lab == "C", ], n.tr = 4, bl.vars = bv, w = c(.5, .2, .1, .1, .1), n.rands = 1000)

## Plotting function:
plot.ps <- function(dat = lab.c, var = "math"){
  tmp <- dat[[2]]
  var.tmp <- var
  tmp2 <- data.frame(bl = sort(tmp$dm.p[tmp$method == "bl" & tmp$variable == var.tmp]), 
                     cr = sort(tmp$dm.p[tmp$method == "cr" & tmp$variable == var.tmp]))
  ggplot(tmp2, aes(x=cr, y = bl)) + geom_point() + geom_abline(intercept = 0, slope = 1, color = "black", linetype = 2) + xlim(0,1) + ylim(0,1) + xlab("Random Allocation's KW p-values") + ylab("Our Procedure's KW p-values") 
}

bv.pretty <- c("Math Exp", "Computing Skill", "Anxiety", "Stats Exp", "Gender")
for(i in 1:5){
  assign(paste("qq", i, sep = ""), plot.ps(var = bv[i]) + annotate("text", label = bv.pretty[i], .5, .99))
}

## pdf("figs/labsQQplots.pdf", width = 14)
grid.arrange(qq1, qq2, qq3, qq4, qq5, ncol = 3)
## dev.off()
rm(bv, bv.pretty, i, n.rands, storeF, storeKW)


## 5-covariate Example:
set.seed(253)
d.out <- draw_groups(n = 24, num.groups = 6, bl.vars = c("isMale", "year", "midterm", "polknowl", "distance"), n.rands = 1000)
## blockTools better balances than does random allocation:
tmp <- data.frame(bl = c(sort(d.out$pvKWbl[, 1]), sort(d.out$pvKWbl[, 2]), sort(d.out$pvKWbl[, 3]), sort(d.out$pvKWbl[, 4]), sort(d.out$pvKWbl[, 5])), cr = c(sort(d.out$pvKWcr[, 1]), sort(d.out$pvKWcr[, 2]), sort(d.out$pvKWcr[, 3]), sort(d.out$pvKWcr[, 4]), sort(d.out$pvKWcr[, 5])))
tmp$varnum <- rep(c("Gender", "Year", "Midterm", "Pol Knowledge", "Distance"), each = nrow(tmp)/5)
ggplot(tmp, aes(x=cr, y = bl)) + geom_line(color = "red") + xlim(0,1) + ylim(0,1) + geom_abline(xintercept = 0, slope = 1) + labs(x= "Random Allocation", y = "Our Procedure") + facet_wrap(~ varnum) 

## Summary:
100*mean(d.out$blocked == 1) ## bT: percent of classes have all 6 groups evenly split between men and women
100*mean(d.out$crand == 1) ## RA: percent of classes have all 6 groups evenly split between men and women
mean(d.out$allmw > 0) ## RA: at least one homogeneous group

